Hi! Welcome to where my work is :) You can scroll through chronologically, or skip to particular sections you are interested in from the navigation bar on the left - please give it a while to load then mouse-over it.

1. Loading and joining data

Set working directory first

Loading required libraries

library(sf)
library(rgdal)
library(raster)
library(mapview)
library(spatstat)
library(sp)
library(rgeos)
library(maptools)
library(GISTools)
library(tmap)
library(sf)
library(geojson)
library(geojsonio)
library(tmaptools)

Opening shapefile, taken from ukdataservice.ac.uk, giving a map of Greater Manchester by 2011 English Census Merged Wards (hence data for basemap shapefile is by ward level).

require(rgdal)
require(ggplot2)
shp <- readOGR(dsn ="BoundaryData", layer="england_cmwd_2011", stringsAsFactors = F)
## OGR data source with driver: ESRI Shapefile 
## Source: "C:\Users\Tommy\Desktop\GIS_proj\Github\CASA0005GISProj\BoundaryData", layer: "england_cmwd_2011"
## with 215 features
## It has 4 fields
plot(shp)

#checking Coordinate Reference System of the shapefile, which was originally NA
summary(shp)
## Object of class SpatialPolygonsDataFrame
## Coordinates:
##        min      max
## x 351662.6 406087.2
## y 381165.4 421037.7
## Is projected: TRUE 
## proj4string :
## [+proj=tmerc +lat_0=49 +lon_0=-2 +k=0.9996012717 +x_0=400000
## +y_0=-100000 +datum=OSGB36 +units=m +no_defs +ellps=airy
## +towgs84=446.448,-125.157,542.060,0.1502,0.2470,0.8421,-20.4894]
## Data attributes:
##     label             altname              name          
##  Length:215         Length:215         Length:215        
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character  
##      code          
##  Length:215        
##  Class :character  
##  Mode  :character
crs(shp)
## CRS arguments:
##  +proj=tmerc +lat_0=49 +lon_0=-2 +k=0.9996012717 +x_0=400000
## +y_0=-100000 +datum=OSGB36 +units=m +no_defs +ellps=airy
## +towgs84=446.448,-125.157,542.060,0.1502,0.2470,0.8421,-20.4894

Specifying CRS of the greater manchester area shapefile as EPSG 4326 for latitude/longiture

Manchester <- spTransform(shp, CRS("+init=epsg:4326"))
st_crs(Manchester)
## Coordinate Reference System:
##   EPSG: 4326 
##   proj4string: "+proj=longlat +datum=WGS84 +no_defs"

Plot Manchester basemap

plot(Manchester)

Extract just the city of Manchester itself rather than greater Manchester, as the area would otherwise be too big for analysis and Ripley’s K.

#extract the city
cityManchester <- subset(Manchester, name %in% c("Didsbury West","Fallowfield","Gorton North","Gorton South","Harpurhey","Higher Blackley","Hulme","Levenshulme","Longsight","Miles Platting and Newton Heath","Moss Side","Moston","Northenden","Old Moat","Rusholme","Sharston","Whalley Range","Withington","Woodhouse Park","Ancoats and Clayton","Ardwick","Baguley","Bradford","Brooklands","Burnage","Charlestown","Cheetham","Chorlton","Chorlton Park","City Centre","Crumpsall","Didsbury East"))

#Check to see that the correct borough has been pulled out
tm_shape(cityManchester) +
  tm_polygons(col = NA, alpha = 0.5)

Read in police station data for greater manchester area, from the geoJSON file obtained using overpass-turbo.eu API and running a query for police stations in the Greater Manchester Area from OpenStreetMap.

Keeping only the points, as readOGR cannot read both points and polygons at the same time, and the points represent police stations.

pol <- rgdal::readOGR("C:/Users/Tommy/Desktop/GIS_proj/BoundaryData/polstn.geojson",require_geomType="wkbPoint")
## OGR data source with driver: GeoJSON 
## Source: "C:\Users\Tommy\Desktop\GIS_proj\BoundaryData\polstn.geojson", layer: "polstn"
## with 114 features;
## Selected wkbPoint feature type, with 51 rows
## It has 42 fields

Plotting the police station points

plot(pol)

summary(pol)
## Object of class SpatialPointsDataFrame
## Coordinates:
##                min       max
## coords.x1 -2.89431 -1.741055
## coords.x2 53.25742 53.778362
## Is projected: FALSE 
## proj4string :
## [+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0]
## Number of points: 51
## Data attributes:
##                id                  X.id      amenity     building 
##  node/1123287516: 1   node/1123287516: 1   police:51   office: 0  
##  node/1131472551: 1   node/1131472551: 1               yes   : 0  
##  node/1290032691: 1   node/1290032691: 1               NA's  :51  
##  node/1429962455: 1   node/1429962455: 1                          
##  node/1865698140: 1   node/1865698140: 1                          
##  node/233211757 : 1   node/233211757 : 1                          
##  (Other)        :45   (Other)        :45                          
##            type     addr.postcode           addr.street
##  multipolygon: 0   OL7 0BQ : 1    Grappenhall Road: 1  
##  NA's        :51   PR25 2EX: 1    Lancastergate   : 1  
##                    WA4 2AF : 1    Manchester Road : 1  
##                    WA6 7NW : 1    Ship Street     : 1  
##                    BL1 8UN : 0    Spendmore Lane  : 1  
##                    (Other) : 0    (Other)         : 1  
##                    NA's    :47    NA's            :45  
##           contact.phone                 contact.website
##  +44 161 872 5050: 0    http://www.gmp.police.uk: 0    
##  NA's            :51    NA's                    :51    
##                                                        
##                                                        
##                                                        
##                                                        
##                                                        
##                                name              opening_hours
##  'B' Divisional Headquarters     : 1   Mo-Sa 10:00-18:00: 0   
##  Adlington                       : 1   NA's             :51   
##  Altrincham Police Station       : 1                          
##  Ashton under lyne Police Station: 1                          
##  Bury Police Station             : 1                          
##  (Other)                         :19                          
##  NA's                            :27                          
##                 note    building.colour building.levels building.material
##  Layout appoximate: 0   wheat: 0        2   : 0         brick: 0         
##  NA's             :51   NA's :51        NA's:51         NA's :51         
##                                                                          
##                                                                          
##                                                                          
##                                                                          
##                                                                          
##   height   roof.colour roof.material roof.shape
##  4   : 0   grey: 0     gravel: 0     flat : 0  
##  NA's:51   NA's:51     NA's  :51     round: 0  
##                                      NA's :51  
##                                                
##                                                
##                                                
##                                                
##                     source                        addr.city 
##  OS_OpenData_StreetView: 2   Frodsham, Cheshire        : 1  
##  OS OpenData StreetView: 1   Huddersfield              : 1  
##  survey                : 1   Leyland                   : 1  
##  Bing                  : 0   Manchester                : 1  
##  Bing aerial image     : 0   Stockton Heath, Warrington: 1  
##  (Other)               : 0   (Other)                   : 0  
##  NA's                  :47   NA's                      :46  
##               phone      fhrs.id                        operator 
##  +44 161 856 5629: 0   335284: 0   Cheshire Constabulary    : 0  
##  +44 161 872 5050: 1   NA's  :51   Cheshire Police          : 2  
##  0845 458 6392   : 0               Greater Manchester Police: 1  
##  NA's            :50               Lancashire Constabulary  : 1  
##                                    NA's                     :47  
##                                                                  
##                                                                  
##  wheelchair                                addr.housename source.building
##  yes : 1    Atherton Police Station               : 0     Bing: 0        
##  NA's:50    Cheshire Constabulary - Police Station: 1     NA's:51        
##             Police Headquarters                   : 0                    
##             Stockton Heath Police Station         : 1                    
##             NA's                                  :49                    
##                                                                          
##                                                                          
##  source.geometry
##  Bing: 0        
##  NA's:51        
##                 
##                 
##                 
##                 
##                 
##                                                       designation
##  Stockport Divisional Headquarters Greater Manchester Police: 0  
##  NA's                                                       :51  
##                                                                  
##                                                                  
##                                                                  
##                                                                  
##                                                                  
##  roof.levels addr.housenumber
##  1   : 0     37  : 0         
##  NA's:51     NA's:51         
##                              
##                              
##                              
##                              
##                              
##                                                                                                 website  
##  http://www.gmp.police.uk                                                                           : 0  
##  https://www.cheshire.police.uk/contact-us/police-stations-and-custody/northwich-police-station.aspx: 0  
##  NA's                                                                                               :51  
##                                                                                                          
##                                                                                                          
##                                                                                                          
##                                                                                                          
##           created_by     is_in     listed_status
##  Potlatch 0.10b: 1   Fulwood: 1   Grade II: 2   
##  Potlatch 0.6a : 1   NA's   :50   NA's    :49   
##  NA's          :49                              
##                                                 
##                                                 
##                                                 
##                                                 
##                            wikimedia_commons      wheelchair.description
##  File:Police Station, Warrington.jpg: 1      No Longer in use: 1        
##  NA's                               :50      NA's            :50        
##                                                                         
##                                                                         
##                                                                         
##                                                                         
##                                                                         
##  entrance 
##  yes : 1  
##  NA's:50  
##           
##           
##           
##           
##           
##                                                                                                                                                                                         description
##  Opening Hours: Monday: 8am to 11pm Tuesday: 8am to 11pm Wednesday: 8am to 11pm Thursday: 8am to 11pm Friday: 8am to 11pm Saturday: 8am to 11pm Sunday: 8am to 11pm Bank Holidays: 8am to 11pm: 1  
##  NA's                                                                                                                                                                                         :50  
##                                                                                                                                                                                                    
##                                                                                                                                                                                                    
##                                                                                                                                                                                                    
##                                                                                                                                                                                                    
##                                                                                                                                                                                                    
##            ele      level        HE_ref  
##  Street Level: 1   0   : 1   1240198: 1  
##  NA's        :50   NA's:50   NA's   :50  
##                                          
##                                          
##                                          
##                                          
## 

Checking the CRS of the police station points - it is already EPSG4326!

st_crs(pol)
## Coordinate Reference System:
##   EPSG: 4326 
##   proj4string: "+proj=longlat +datum=WGS84 +no_defs"

Removing duplicates from police station points

polstns <- remove.duplicates(pol)

Plotting the police station points on top of the greater manchester area map using mapview

mapview::mapview(Manchester) + mapview::mapview(polstns, color = "white", col.regions = "black",legend=FALSE)

Basemap of Manchester is a SpatialPolygonsDataFrame Police station points is a SpatialPointsDataFrame

class(cityManchester)
## [1] "SpatialPolygonsDataFrame"
## attr(,"package")
## [1] "sp"
class(polstns)
## [1] "SpatialPointsDataFrame"
## attr(,"package")
## [1] "sp"

Plot the police stations in the Greater Manchester area using tmap

tmap_mode("view")
tm_shape(Manchester) +
  tm_polygons(col = NA, alpha = 0.5) +
tm_shape(polstns) +
  tm_dots(col = "blue")

Plot the police stations in the Manchester city area using tmap

tmap_mode("view")
tm_shape(cityManchester) +
  tm_polygons(col = NA, alpha = 0.5) +
tm_shape(polstns) +
  tm_dots(col = "blue")

Filtering for police stations which are within the Manchester city boundary

proj4string(cityManchester) <- CRS("+init=epsg:4326")
proj4string(polstns) <- CRS("+init=epsg:4326")

polsub <- polstns[cityManchester,]
#checking to see that they've been removed
tmap_mode("view")
tm_shape(cityManchester) +
  tm_polygons(col = NA, alpha = 0.5) +
tm_shape(polsub) +
  tm_dots(col = "blue")

Reading in the dataset for population density

library(readr)
popdens <- read_csv("popdens.csv")
summary(popdens)
##   geography            popdens      
##  Length:32          Min.   : 12.00  
##  Class :character   1st Qu.: 37.40  
##  Mode  :character   Median : 47.95  
##                     Mean   : 52.83  
##                     3rd Qu.: 66.53  
##                     Max.   :116.70

Joining first dataset of population density with the Manchester basemap

require(sp)
?sp::merge
manchester_popdens <- merge(cityManchester, popdens, by.x = "name", by.y = "geography",duplicateGeoms = TRUE)

Plotting map to test if join has succeeded

tmap_mode("view")
tm_shape(manchester_popdens) +
  tm_polygons(col = NA, alpha = 0.5) +
tm_shape(polsub) +
  tm_dots(col = "blue")

Reading in the dataset for deprivation - the average number (out of 4 - employment, education, health and disability and household overcrowding) of dimensions a household was deprived in was used

depr <- read_csv("deprivation.csv")
summary(depr)
##   geography          deprivation    
##  Length:32          Min.   :0.5989  
##  Class :character   1st Qu.:0.9484  
##  Mode  :character   Median :1.1398  
##                     Mean   :1.0764  
##                     3rd Qu.:1.2323  
##                     Max.   :1.4070

Joining second dataset of deprivation with the Manchester basemap

manchester_2 <- merge(manchester_popdens, depr, by.x = "name", by.y = "geography",duplicateGeoms = TRUE)

Plotting map to check if second join has succeeded

tmap_mode("view")
tm_shape(manchester_2) +
  tm_polygons(col = NA, alpha = 0.5) +
tm_shape(polsub) +
  tm_dots(col = "blue")

Reading in the dataset for SES - the average social grade by ward was taken, by using a numeric scale of AB = 1, C1 = 2, C2 = 3, DE = 4, so lower numbers would mean a higher social grade.

ses <- read_csv("SES.csv")
summary(ses)
##   geography          socialgrade   
##  Length:32          Min.   :1.739  
##  Class :character   1st Qu.:2.404  
##  Mode  :character   Median :2.744  
##                     Mean   :2.620  
##                     3rd Qu.:2.896  
##                     Max.   :3.119

Joining third dataset of SES with the Manchester basemap

manchester_merged <- merge(manchester_2, ses, by.x = "name", by.y = "geography",duplicateGeoms = TRUE)

Plotting map to check if third join has succeeded

tmap_mode("view")
tm_shape(manchester_merged) +
  tm_polygons(col = NA, alpha = 0.5) +
tm_shape(polsub) +
  tm_dots(col = "blue")

manchester_merged is the map of Manchester city with merged data on population density, social grade and SES.

2. Plotting/performing KDE for crime data in Manchester city

Next step would be to map the crime datapoints onto the Manchester basemap. Crime data for June 2019 for the Greater Manchester area was used. This was taken from https://data.police.uk/data/

crime <- read_csv("manchester_crime.csv")
crime_locations <- st_as_sf(crime, coords = c("Longitude","Latitude"), crs = 4326)
class(crime)
## [1] "spec_tbl_df" "tbl_df"      "tbl"         "data.frame"
require(sf)
crime_sf <- st_as_sf(x = crime, 
                        coords = c("Longitude", "Latitude"),
                        crs = "+init=epsg:4326")
# simple plot
plot(crime_sf)

crime_spdf <- as(crime_sf, "Spatial")


manchester_crime_test<-ggplot(crime, aes(x=Longitude,y=Latitude))+geom_point()+coord_equal()
manchester_crime_test

Plotting a 2D KDE of crime occurences in Manchester using the longitude and latitude data in the crime dataset

manchester_crime_test<-ggplot(crime, aes(x=Longitude,y=Latitude))+stat_bin2d(bins=30)
manchester_crime_test

Plotting a continuous distribution instead

manchester_crime_test+stat_density2d(aes(fill = ..level..), geom="polygon")

Plotting the crime datapoints to test

plot(crime_locations)

Examining metadata of the crime datapoints

summary(crime_locations)
##           geometry   
##  POINT        :8828  
##  epsg:4326    :   0  
##  +proj=long...:   0

Checking CRS of crime datapoints

crs(crime_locations)
## [1] "+proj=longlat +datum=WGS84 +no_defs"

Checking type of data of the crimem locations - sf, tbl_df, tbl, data.frame

class(crime_locations)
## [1] "sf"         "tbl_df"     "tbl"        "data.frame"

Ensuring that crime_locations has EPSG 4326 as CRS

crime_locations_sp <- as(crime_locations, 'Spatial')
crime_locations_sp <- spTransform(crime_locations_sp, CRS("+init=epsg:4326"))

Checking new CRS of crime datapoints and data type, removing duplicates

crs(crime_locations)
## [1] "+proj=longlat +datum=WGS84 +no_defs"
class(crime_locations)
## [1] "sf"         "tbl_df"     "tbl"        "data.frame"

Plotting crime data points on Manchester basemap

mapview::mapview(manchester_merged) + mapview::mapview(crime_locations, color = "white", col.regions = "black",legend=FALSE, title = "Crime occurences in Manchester city")

3. Descriptive statistics for data

library(tidyverse)
library(downloader)
library(rgdal)
library(sf)
library(ggplot2)
library(reshape2)
library(plotly)
library(highcharter)

Histograms for each of the scale variables - population densities, deprivation, and SES Starting with popdens

# Histogram with mean line (red) and median line (blue) and density plot
ggplot(popdens, aes(x=popdens)) + 
 geom_histogram(aes(y=..density..), colour="black", fill="white", binwidth=10)+
 geom_density(alpha=.2, fill="#FF6666") + geom_vline(aes(xintercept=mean(popdens)),
            color="red", linetype="dashed", size=1) + geom_vline(aes(xintercept=median(popdens)),
            color="blue", linetype="dashed", size=1) 

# Descriptive statistics
print(max(popdens$popdens))
## [1] 116.7
print(min(popdens$popdens))
## [1] 12
print(median(popdens$popdens))
## [1] 47.95
print(sd(popdens$popdens))
## [1] 22.6814
print(IQR(popdens$popdens))
## [1] 29.125
print(quantile(popdens$popdens, c(.25, .75)))
##    25%    75% 
## 37.400 66.525

Plotting histogram for deprivation next

# Histogram with mean line (red) and median line (blue) and density plot
ggplot(depr, aes(x=deprivation)) + 
 geom_histogram(aes(y=..density..), colour="black", fill="white", binwidth= 0.1)+
 geom_density(alpha=.2, fill="#FF6666") + geom_vline(aes(xintercept=mean(deprivation)),
            color="red", linetype="dashed", size=1) + geom_vline(aes(xintercept=median(deprivation)),
            color="blue", linetype="dashed", size=1) 

# Descriptive statistics
print(max(depr$deprivation))
## [1] 1.406953
print(min(depr$deprivation))
## [1] 0.5988858
print(median(depr$deprivation))
## [1] 1.139765
print(sd(depr$deprivation))
## [1] 0.2171107
print(IQR(depr$deprivation))
## [1] 0.283867
print(quantile(depr$deprivation, c(.25, .75)))
##       25%       75% 
## 0.9484226 1.2322897

Plotting histogram for social grade next

# Histogram with mean line (red) and median line (blue) and density plot
ggplot(ses, aes(x=socialgrade)) + 
 geom_histogram(aes(y=..density..), colour="black", fill="white", binwidth= 0.1)+
 geom_density(alpha=.2, fill="#FF6666") + geom_vline(aes(xintercept=mean(socialgrade)),
            color="red", linetype="dashed", size=1) + geom_vline(aes(xintercept=median(socialgrade)),
            color="blue", linetype="dashed", size=1) 

# Descriptive statistics
print(max(ses$socialgrade))
## [1] 3.118662
print(min(ses$socialgrade))
## [1] 1.738781
print(median(ses$socialgrade))
## [1] 2.744045
print(sd(ses$socialgrade))
## [1] 0.3916912
print(IQR(ses$socialgrade))
## [1] 0.4920259
print(quantile(ses$socialgrade, c(.25, .75)))
##      25%      75% 
## 2.403788 2.895813

Plotting histogram for crimes recorded per ward next

#reading in data for crimes recorded per ward (taken from later part of the code after performing count)
library(readr)
crime_ward <- read_csv("crime_ward.csv")
summary(crime_ward)
##   geography             crimes      
##  Length:32          Min.   :  96.0  
##  Class :character   1st Qu.: 145.2  
##  Mode  :character   Median : 183.0  
##                     Mean   : 275.7  
##                     3rd Qu.: 330.8  
##                     Max.   :1887.0
crime_ward$lnCrime <- log1p(crime_ward$crimes)
# Histogram with mean line (red) and median line (blue) and density plot
ggplot(crime_ward, aes(x=lnCrime)) + 
 geom_histogram(aes(y=..density..), colour="black", fill="white", binwidth= 0.05)+
 geom_density(alpha=.2, fill="#FF6666") + geom_vline(aes(xintercept=mean(lnCrime)),
            color="red", linetype="dashed", size=1) + geom_vline(aes(xintercept=median(lnCrime)),
            color="blue", linetype="dashed", size=1) 

# Descriptive statistics
print(max(crime_ward$crimes))
## [1] 1887
print(min(crime_ward$crimes))
## [1] 96
print(median(crime_ward$crimes))
## [1] 183
print(sd(crime_ward$crimes))
## [1] 313.0229
print(IQR(crime_ward$crimes))
## [1] 185.5
print(quantile(crime_ward$crimes, c(.25, .75)))
##    25%    75% 
## 145.25 330.75

Converting both cityManchester and crime_locations to data.frames to run the KDE, since it requires numeric input

df_cityManchester <- as.data.frame(cityManchester)
df_crime_locations <- as.data.frame(crime_locations)

Create two separate fields for latitude and longitude from the manchester spatialpolygonsdataframe

polys = attr(cityManchester,'polygons')
npolys = length(polys)
for (i in 1:npolys){
  poly = polys[[i]]
  polys2 = attr(poly,'Polygons')
  npolys2 = length(polys2)
  for (j in 1:npolys2){
     #do stuff with these values
     coords = coordinates(polys2[[j]])
     
  }
}
coords_df <- as.data.frame(coords)

Plotting out crime occurences in Manchestaer city

ggplot() + geom_polygon(data = coords_df, aes(x = V1, y = V2), fill = "grey75") +
  geom_point(data = crime, aes(x = Longitude, y = Latitude),
             col = "dodger blue", alpha = .5, size = 1.5) +
  coord_equal() +
  ggtitle("Crime occurences in Manchester")

Counting number of crime occurence points per polygon

class(manchester_merged)
## [1] "SpatialPolygonsDataFrame"
## attr(,"package")
## [1] "sp"
class(crime_spdf)
## [1] "SpatialPointsDataFrame"
## attr(,"package")
## [1] "sp"
proj4string(manchester_merged)
## [1] "+init=epsg:4326 +proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
crime_spdf <- spTransform(crime_spdf, CRS("+init=epsg:4326"))
proj4string(crime_spdf)
## [1] "+init=epsg:4326 +proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
plot(manchester_merged)
plot(crime_spdf, col="red" , add=TRUE)

res <- over(crime_spdf, manchester_merged)
table(res$name)
## 
##             Ancoats and Clayton                         Ardwick 
##                             484                             368 
##                         Baguley                        Bradford 
##                             179                             333 
##                      Brooklands                         Burnage 
##                             136                             149 
##                     Charlestown                        Cheetham 
##                             176                             425 
##                        Chorlton                   Chorlton Park 
##                             108                             137 
##                     City Centre                       Crumpsall 
##                            1887                             270 
##                   Didsbury East                   Didsbury West 
##                             101                             126 
##                     Fallowfield                    Gorton North 
##                             151                             298 
##                    Gorton South                       Harpurhey 
##                             212                             443 
##                 Higher Blackley                           Hulme 
##                             355                             213 
##                     Levenshulme                       Longsight 
##                             199                             187 
## Miles Platting and Newton Heath                       Moss Side 
##                             334                             219 
##                          Moston                      Northenden 
##                             148                             171 
##                        Old Moat                        Rusholme 
##                             136                             172 
##                        Sharston                   Whalley Range 
##                             151                             129 
##                      Withington                  Woodhouse Park 
##                              96                             330

After plotting number of crimes per ward on a .csv file, time to merge with the manchester_merged basemap. Reading in the dataset for crimes per ward using code chunk directly above

summary(crime_ward)
##   geography             crimes          lnCrime     
##  Length:32          Min.   :  96.0   Min.   :4.575  
##  Class :character   1st Qu.: 145.2   1st Qu.:4.985  
##  Mode  :character   Median : 183.0   Median :5.215  
##                     Mean   : 275.7   Mean   :5.380  
##                     3rd Qu.: 330.8   3rd Qu.:5.804  
##                     Max.   :1887.0   Max.   :7.543

Joining crimes per ward dataset with the Manchester basemap

manchester_merged_final <- merge(x= manchester_merged, y=crime_ward, by.x = "name", by.y = "geography")

Plotting map to check if join has succeeded.

library(tmap)
tmap_mode("view")
tm_shape(manchester_merged_final) +
  tm_polygons(col = NA, alpha = 0.5) +
tm_shape(polsub) +
  tm_dots(col = "blue")

All data (popdens, social grade, SES, crime) on ward level loaded, locations of 4 police stations in Manchester city also loaded.

4. Choropleth maps by variables across Manchester

Making a choropleth map by crimes

library(ggplot2)
library(RColorBrewer)
library(classInt)
names(manchester_merged_final)
## [1] "name"        "label"       "altname"     "code"        "popdens"    
## [6] "deprivation" "socialgrade" "crimes"      "lnCrime"
#spplot(manchester_merged_final, "crimes")
# quantile breaks
breaks_qt <- classIntervals(manchester_merged_final$crimes, n = 7, style = "quantile")
br <- breaks_qt$brks 
offs <- 0.0000001 
br[1] <- br[1] - offs 
br[length(br)] <- br[length(br)] + offs 
# categories for choropleth map
manchester_merged_final$crimes_bracket <- cut(manchester_merged_final$crimes, br)
# plot
#my.palette <- brewer.pal(n = 7, name = "OrRd")
#spplot(manchester_merged_final, "crimes_bracket", col.regions=my.palette, main = "Manchester City Recorded Crimes in June 2019 by Ward")

#Using tmap
library(tmap)
tm_shape(manchester_merged_final) +
  tm_polygons("crimes", 
              style="quantile", 
              title="Manchester city \nrecorded crimes \nby ward in June 2019")
tmap_mode("view")

Making a choropleth map by popdens

# quantile breaks
breaks_qt <- classIntervals(manchester_merged_final$popdens, n = 7, style = "quantile")
br <- breaks_qt$brks 
offs <- 0.0000001 
br[1] <- br[1] - offs 
br[length(br)] <- br[length(br)] + offs 

# categories for choropleth map
manchester_merged_final$popdens_bracket <- cut(manchester_merged_final$popdens, br)
# plot
#spplot(manchester_merged_final, "popdens_bracket", col.regions=my.palette, main = "Manchester City Population Density by Ward")

#Using tmap
library(tmap)
tm_shape(manchester_merged_final) +
  tm_polygons("popdens", 
              style="quantile", 
              title="Manchester city \nPopulation Density \nby ward")
tmap_mode("view")

Making a choropleth map by deprivation

#spplot(manchester_merged_final, "deprivation")
breaks_qt <- classIntervals(manchester_merged_final$deprivation, n = 7, style = "quantile")
br <- breaks_qt$brks 
offs <- 0.0000001 
br[1] <- br[1] - offs 
br[length(br)] <- br[length(br)] + offs 
# categories for choropleth map
manchester_merged_final$deprivation_bracket <- cut(manchester_merged_final$deprivation, br)
# plot

#spplot(manchester_merged_final, "deprivation_bracket", col.regions=my.palette, main = "Manchester City deprivation by Ward")
#Using tmap
library(tmap)
tm_shape(manchester_merged_final) +
  tm_polygons("deprivation", 
              style="quantile", 
              title="Manchester city \ndeprivation \nby ward")
tmap_mode("view")

Making a choropleth map by social grade

#spplot(manchester_merged_final, "socialgrade")
breaks_qt <- classIntervals(manchester_merged_final$socialgrade, n = 7, style = "quantile")
br <- breaks_qt$brks 
offs <- 0.0000001 
br[1] <- br[1] - offs 
br[length(br)] <- br[length(br)] + offs 
# categories for choropleth map
manchester_merged_final$socialgrade_bracket <- cut(manchester_merged_final$socialgrade, br)
# plot
#spplot(manchester_merged_final, "socialgrade_bracket", col.regions=my.palette, main = "Manchester City social grade by Ward")
#Using tmap
library(tmap)
tm_shape(manchester_merged_final) +
  tm_polygons("socialgrade", 
              style="quantile", 
              title="Manchester city \nsocial grade \nby ward")
tmap_mode("view")

5. Zooming in on Bradford (single ward)

Next, point pattern analysis will be conducted, to confirm the visual suspicion that there is spatial clustering of crimes within Manchester city. The entire Manchester city was too large to conduct analysis on, and hence this particular study zoomed in on a particular ward - Bradford. Bradford was picked as it has two police stations, so it would make an interesting study to see if the clusters are near the police stations.

We have to create an observation window for spatstat to carry out analysis within – set to the extent of the Bradford ward.

Thus, pull out the Bradford ward.

#extract the Bradford ward
bradford <- manchester_merged_final[manchester_merged_final@data$name=="Bradford",]
#Checking to see if it has been pulled out successfully
tm_shape(bradford) +
  tm_polygons(col = NA, alpha = 0.5)
#sf object:
bradfordSF <- st_as_sf(manchester_merged_final)
bradfordSF <- bradfordSF[bradfordSF$name=="Bradford",]

We need to clip the crime occurence datapoints so that we have a subset of just those that fall within the Bradford ward.

#clip the data to our single ward
library(maptools)
crime_respdf <- readShapePoints("crime_exported.shp", proj4string = CRS("+init=epsg:4326 +proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"), verbose = FALSE, repair=FALSE)


polsub_bradford <- polsub[bradford,]

Check that it’s worked - blue dots are crime occurrences and red dots are police stations in Bradford

crime_bradford <- crime_respdf[bradford,]
crs(crime_bradford)
## CRS arguments:
##  +init=epsg:4326 +proj=longlat +datum=WGS84 +no_defs +ellps=WGS84
## +towgs84=0,0,0
tmap_mode("view")+tm_shape(bradford) +tm_polygons(col = NA, alpha = 0.5) +tm_shape(crime_respdf[bradford,]) +  tm_dots(col = "blue") + tm_shape(polsub_bradford) +tm_dots(col = "red")

6. KDE for crimes in Bradford

Create an observation window for spatstat to carry out analysis

library(spatstat)
library(sp)
library(rgeos)
library(maptools)
library(GISTools)
library(tmap)
library(sf)
library(geojson)
library(geojsonio)
library(tmaptools)
BNG = "+init=epsg:27700"
bradfordBNG <- spTransform(bradford,BNG)
window <- as.owin(bradfordBNG)
plot(window)

crime_bradfordBNG <-spTransform(crime_bradford,BNG) 

For point pattern analysis, have to create a point pattern (ppp) object

crime_bradford.ppp <- ppp(x=crime_bradfordBNG@coords[,1],y=crime_bradfordBNG@coords[,2],window=window)

Checking the ppp object

plot(crime_bradford.ppp,pch=16,cex=0.5, main="Crime occurrences in Manchester city, Bradford ward, June 2019")

class(crime_bradford.ppp)
## [1] "ppp"
summary(crime_bradford.ppp)
## Planar point pattern:  333 points
## Average intensity 6.449476e-05 points per square unit
## 
## *Pattern contains duplicated points*
## 
## Coordinates are given to 2 decimal places
## i.e. rounded to the nearest multiple of 0.01 units
## 
## Window: polygonal boundary
## single connected closed polygon with 277 vertices
## enclosing rectangle: [385196.5, 390147.4] x [396899.8, 399214.2] units
## Window area = 5163210 square units
## Fraction of frame area: 0.451
plot(crime_bradford)

Plotting KDE for crime occurences in Bradford ward

ds <- density(crime_bradford.ppp)
class(ds)
## [1] "im"
plot(ds, main = "Crime density in Bradford ward, Manchester city, June 2019")

Doing the same with contours

K1 <- density(crime_bradford.ppp) # Using the default bandwidth
plot(K1, main=NULL, las=1)
contour(K1, add=TRUE)

Changing bandwidth

K2 <- density(crime_bradford.ppp, sigma=50) # Using a 50km bandwidth
plot(K2, main=NULL, las=1)
contour(K2, add=TRUE)

7. Testing for CSR in Bradford

Testing for spatial randomness of crime occurrences in Bradford ward, Manchester city

7a. Ripley’s K

K <- Kest(crime_bradford.ppp, correction="border")
plot(K)

Theoretical model of CSR, compare actual results to that.

7b. Kolmogorov-Smirnoff test for CSR

With point data, we specify a real function T(x,y) defined at all locations x,y in our sampling window. We evaluate this function at each of the data points and compare the empirical values of T with the predicted distribution of T under the CSR assumptions.

ks <- cdf.test(crime_bradford.ppp, "x")
plot(ks)

pval <- ks$p.value
pval
## [1] 1.053552e-09

Density function as a covariate to estimate overall spatial randomness

ds <- density(crime_bradford.ppp)
k <- cdf.test(crime_bradford.ppp, ds)
plot(k)

7c. G function test for CSR

The G function measures the distribution of distances from an arbitrary event to its nearest event (i.e. uses nearest neighbor distances). By plotting our empirically estimated G function against our theoretical expectation if the process were CSR, we can assess the extent to which the empirical distribution of our data is different from the theoretical CSR expectation.

gtest <- Gest(crime_bradford.ppp)
gtest
## Function value object (class 'fv')
## for the function r -> G(r)
## .....................................................................
##         Math.label      Description                                  
## r       r               distance argument r                          
## theo    G[pois](r)      theoretical Poisson G(r)                     
## han     hat(G)[han](r)  Hanisch estimate of G(r)                     
## rs      hat(G)[bord](r) border corrected estimate of G(r)            
## km      hat(G)[km](r)   Kaplan-Meier estimate of G(r)                
## hazard  hat(h)[km](r)   Kaplan-Meier estimate of hazard function h(r)
## theohaz h[pois](r)      theoretical Poisson hazard function h(r)     
## .....................................................................
## Default plot formula:  .~r
## where "." stands for 'km', 'rs', 'han', 'theo'
## Recommended range of argument r: [0, 82.406]
## Available range of argument r: [0, 238.37]
plot(gtest)

## 7d. F test for CSR

ftest <- Fest(crime_bradford.ppp)
ftest
## Function value object (class 'fv')
## for the function r -> F(r)
## .....................................................................
##         Math.label      Description                                  
## r       r               distance argument r                          
## theo    F[pois](r)      theoretical Poisson F(r)                     
## cs      hat(F)[cs](r)   Chiu-Stoyan estimate of F(r)                 
## rs      hat(F)[bord](r) border corrected estimate of F(r)            
## km      hat(F)[km](r)   Kaplan-Meier estimate of F(r)                
## hazard  hat(h)[km](r)   Kaplan-Meier estimate of hazard function h(r)
## theohaz h[pois](r)      theoretical Poisson hazard h(r)              
## .....................................................................
## Default plot formula:  .~r
## where "." stands for 'km', 'rs', 'cs', 'theo'
## Recommended range of argument r: [0, 194.37]
## Available range of argument r: [0, 239.58]
plot(ftest)

8. DBSCAN to determine cluster locations

Carrying out DBSCAN to determine where the clusters are, after having established that CSR does not hold, using the tests above.

library(raster)
library(fpc)
library(plyr)
library(OpenStreetMap)

Checking CRS of the bradford spatial polygon

crs(bradford)
## CRS arguments:
##  +init=epsg:4326 +proj=longlat +datum=WGS84 +no_defs +ellps=WGS84
## +towgs84=0,0,0
#first extract the points from the spatial points data frame
crime_bradford_points <- data.frame(crime_bradford@coords[,1:2])
#now run the dbscan analysis
db <- fpc::dbscan(crime_bradford_points, eps = 0.003, MinPts = 30)
#now plot the results
plot(db, crime_bradford_points, main = "DBSCAN Output", frame = F)
plot(bradford, add=T)

Can use kNNdistplot() from the dbscan package to find a suitable eps value based on the Ripley’s K plot. Above 300metres sees a sharp spike, so 0.003 would be good.

library(dbscan)
dbscan::kNNdistplot(crime_bradford_points, k =  30)

Using ggplot2 to plot a nicer map

library(ggplot2)

Calling cluster db object

db
## dbscan Pts=333 MinPts=30 eps=0.003
##          0  1  2
## border 214 29 23
## seed     0  6 61
## total  214 35 84
db$cluster
##   [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
##  [36] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 1 0 0 0
##  [71] 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [106] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [141] 0 2 2 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1 1 0 0 0 1 1 1 1 1 1
## [176] 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 0 1 2 0 2 2 2 2 2 2 0 2 2 2 2
## [211] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 0 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [246] 2 2 2 2 2 0 2 2 0 0 2 2 2 2 2 2 0 2 2 2 2 0 2 2 0 2 2 2 2 2 2 2 2 2 2
## [281] 0 2 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [316] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0

Adding this information back into crime dataframe

crime_bradford_points$cluster <- db$cluster

Creating convex hull polygons to wrap around the points in the clusters

chulls <- ddply(crime_bradford_points, .(cluster), 
                function(df) df[chull(df$coords.x1, df$coords.x2), ])

Can drop 0 from the dataframe, as they are not in clusters

chulls <- subset(chulls, cluster>=1)

Creating a ggplot object from clusters 1 and 2

dbplot <- ggplot(data=crime_bradford_points, 
                 aes(coords.x1,coords.x2, colour=cluster, fill=cluster)) 
#add the points in
dbplot <- dbplot + geom_point()
#now the convex hulls
#convert spatialpointsdataframe to a dataframe as ggplot cannot handle it
df_polsub_bradford <- data.frame(polsub_bradford)
dbplot <- dbplot + geom_polygon(data = chulls, 
                                aes(coords.x1,coords.x2, group=cluster), 
                                alpha = 0.5) + geom_point(data=df_polsub_bradford,aes(x=coords.x1,y=coords.x2), inherit.aes = FALSE, color="red")
#now plot, setting the coordinates to scale correctly and as a black and white plot 
dbplot + theme_bw() + coord_equal()

Checking CRS for bradford

crs(bradford)
## CRS arguments:
##  +init=epsg:4326 +proj=longlat +datum=WGS84 +no_defs +ellps=WGS84
## +towgs84=0,0,0

Get the bbox in lat long

##First get the bbox in lat long for Bradford
latlong <- "+init=epsg:4326" 
bradfordWGS <-spTransform(bradford, CRS(latlong))
bradfordWGS@bbox
##         min       max
## x -2.224532 -2.149895
## y 53.468712 53.489462

Convert basemap to the same CRS

basemap<-openmap(c(53.467712,-2.225532),c(53.489362,-2.149995), zoom=NULL,"stamen-toner")
# convert the basemap to British National Grid - remember we created the 
# BNG object right at the beginning of the practical - it's an epsg string...
basemap_bng<-openproj(basemap, projection="+init=epsg:4326")

Adding a basemap

autoplot(basemap_bng) + geom_point(data=crime_bradford_points, 
                                   aes(coords.x1,coords.x2, 
                                       colour=cluster, fill=cluster)) + 
  geom_polygon(data = chulls, aes(coords.x1,coords.x2, group=cluster, fill=cluster), 
               alpha = 0.5)   + geom_point(data=df_polsub_bradford,aes(x=coords.x1,y=coords.x2), inherit.aes = FALSE, color="red")

Plotting all crime occurences in Manchester city

tm_shape(manchester_merged_final) +
  tm_polygons(col = NA, alpha = 0.5) +
tm_shape(crime_spdf) +
  tm_dots(col = "blue")

9. Obtaining Spatial Weights Matrix (entire Manchester city hereafter)

Calculating the area of each polygon (ward) in Manchester city

library(rgdal)
getClass("Polygon")
## Class "Polygon" [package "sp"]
## 
## Slots:
##                                               
## Name:    labpt    area    hole ringDir  coords
## Class: numeric numeric logical integer  matrix
## 
## Extends: "Line"
manchester_merged_final$Area_sqkm<-sapply(slot(manchester_merged_final, "polygons"), function(x) sapply(slot(x, "Polygons"), slot, "area"))
str(manchester_merged_final$Area_sqkm)
##  num [1:33] 0.000319 0.000215 0.001543 0.000554 0.000518 ...
class(manchester_merged_final$Area_sqkm)
## [1] "numeric"
manchester_merged_final$Area_sqkm <- lapply(manchester_merged_final$Area_sqkm, FUN= function(x) x*12100)

Extracting the area of each ward data from manchester_merged_final

citymanchester.df <- as.data.frame(manchester_merged_final)

citymanchester.df$wardarea <- as.numeric(rownames(citymanchester.df))
class(citymanchester.df$crimes)
## [1] "numeric"
  class(citymanchester.df$Area_sqkm)
## [1] "list"
citymanchester.df$Area_sqkm <- as.numeric(as.character(unlist(citymanchester.df$Area_sqkm)))
citymanchester.df$crimedensity <- citymanchester.df$crimes / citymanchester.df$Area_sqkm

Joining crime density data to the manchester_merged_final map

Reading in the dataset for SES - the average social grade by ward was taken, by using a numeric scale of AB = 1, C1 = 2, C2 = 3, DE = 4, so lower numbers would mean a higher social grade.

crimedensity <- read_csv("crimedensity.csv")
summary(crimedensity)
##   geography          crimedensity   
##  Length:32          Min.   : 17.40  
##  Class :character   1st Qu.: 28.20  
##  Mode  :character   Median : 39.30  
##                     Mean   : 51.33  
##                     3rd Qu.: 53.67  
##                     Max.   :369.80

Joining crime dennsity dataset with the Manchester basemap

manchester_final <- merge(manchester_merged_final, crimedensity, by.x = "name", by.y = "geography",duplicateGeoms = TRUE)

Plotting map to check if third join has succeeded

tmap_mode("view")
tm_shape(manchester_final) +
  tm_polygons(col = NA, alpha = 0.5) +
tm_shape(polsub) +
  tm_dots(col = "blue")

Adding number of police stations to Manchester_final

#reading in data for police stations per ward
library(readr)
policestns <- read_csv("policestns.csv")
manchester_final <- merge(manchester_final, policestns, by.x = "name", by.y = "geography",duplicateGeoms = TRUE)

Some clustering in Manchester itself based on KDE diagrams above, and some clustering in Bradford ward (to be analysed later) Before being able to calculate Moran’s I and any similar statistics, we need to first define a spatial weights matrix

library(spdep)

Calculating the centroids of all wards in Manchester city

coordsWards <- coordinates(manchester_final)
plot(coordsWards)

Generating a spatial weights matrix, using contiguity edges corners.

#create a neighbours list
MWard_nb <- poly2nb(manchester_final, queen=T) #this is a neighbours list of queens contiguity
knn_wards <- knearneigh(coordsWards, k=4) #THIS IS A NEIGHBOURS LIST FOR NEAREST NEIGHBOURS CONTIGUITY

MWard_knn <- knn2nb(knn_wards)
#plot neighbours list of queens contiguity
plot(MWard_nb, coordinates(coordsWards), col="red") + plot(manchester_final, add=T)

## integer(0)
#plot neighbours list of nearest neighbours contiguity
plot(MWard_knn, coordinates(coordsWards), col="red") + plot(manchester_final, add=T)

## integer(0)

Create spatial weights object from weights

Mward.queens_weight <- nb2listw(MWard_nb, style="W")
head(Mward.queens_weight)
## $style
## [1] "W"
## 
## $neighbours
## Neighbour list object:
## Number of regions: 33 
## Number of nonzero links: 138 
## Percentage nonzero weights: 12.67218 
## Average number of links: 4.181818 
## 
## $weights
## $weights[[1]]
## [1] 0.25 0.25 0.25 0.25
## 
## $weights[[2]]
## [1] 0.1666667 0.1666667 0.1666667 0.1666667 0.1666667 0.1666667
## 
## $weights[[3]]
## [1] 0.5 0.5
## 
## $weights[[4]]
## [1] 0.1428571 0.1428571 0.1428571 0.1428571 0.1428571 0.1428571 0.1428571
## 
## $weights[[5]]
## [1] 0.25 0.25 0.25 0.25
## 
## $weights[[6]]
## [1] 0.2 0.2 0.2 0.2 0.2
## 
## $weights[[7]]
## [1] 0.3333333 0.3333333 0.3333333
## 
## $weights[[8]]
## [1] 0.1666667 0.1666667 0.1666667 0.1666667 0.1666667 0.1666667
## 
## $weights[[9]]
## [1] 0.25 0.25 0.25 0.25
## 
## $weights[[10]]
## [1] 0.25 0.25 0.25 0.25
## 
## $weights[[11]]
## [1] 0.3333333 0.3333333 0.3333333
## 
## $weights[[12]]
## [1] 0.3333333 0.3333333 0.3333333
## 
## $weights[[13]]
## [1] 0.25 0.25 0.25 0.25
## 
## $weights[[14]]
## [1] 0.2 0.2 0.2 0.2 0.2
## 
## $weights[[15]]
## [1] 0.25 0.25 0.25 0.25
## 
## $weights[[16]]
## [1] 0.5 0.5
## 
## $weights[[17]]
## [1] 0.1666667 0.1666667 0.1666667 0.1666667 0.1666667 0.1666667
## 
## $weights[[18]]
## [1] 0.2 0.2 0.2 0.2 0.2
## 
## $weights[[19]]
## [1] 0.25 0.25 0.25 0.25
## 
## $weights[[20]]
## [1] 0.25 0.25 0.25 0.25
## 
## $weights[[21]]
## [1] 0.2 0.2 0.2 0.2 0.2
## 
## $weights[[22]]
## [1] 0.1428571 0.1428571 0.1428571 0.1428571 0.1428571 0.1428571 0.1428571
## 
## $weights[[23]]
## [1] 0.25 0.25 0.25 0.25
## 
## $weights[[24]]
## [1] 0.3333333 0.3333333 0.3333333
## 
## $weights[[25]]
## [1] 0.2 0.2 0.2 0.2 0.2
## 
## $weights[[26]]
## [1] 0.5 0.5
## 
## $weights[[27]]
## [1] 0.3333333 0.3333333 0.3333333
## 
## $weights[[28]]
## [1] 0.1666667 0.1666667 0.1666667 0.1666667 0.1666667 0.1666667
## 
## $weights[[29]]
## [1] 0.2 0.2 0.2 0.2 0.2
## 
## $weights[[30]]
## [1] 0.2 0.2 0.2 0.2 0.2
## 
## $weights[[31]]
## [1] 0.25 0.25 0.25 0.25
## 
## $weights[[32]]
## [1] 0.3333333 0.3333333 0.3333333
## 
## $weights[[33]]
## [1] 1
## 
## attr(,"mode")
## [1] "binary"
## attr(,"W")
## [1] TRUE
## attr(,"comp")
## attr(,"comp")$d
##  [1] 4 6 2 7 4 5 3 6 4 4 3 3 4 5 4 2 6 5 4 4 5 7 4 3 5 2 3 6 5 5 4 3 1
Mward.nn_weight <- nb2listw(MWard_knn, style="W")

10. Spatial autocorrelation

Now we have defined our Wij matrix, we can calculate the Moran’s I and other associated statistics.

10a. Moran’s I across Manchester city

Moran’s I test tells us whether we have clustered values (close to 1) or dispersed values (close to -1), we will calculate for the densities rather than raw values.

I_MWard_Global_Density <- moran.test(manchester_final@data$crimedensity, Mward.queens_weight)
I_MWard_Global_Density
## 
##  Moran I test under randomisation
## 
## data:  manchester_final@data$crimedensity  
## weights: Mward.queens_weight    
## 
## Moran I statistic standard deviate = 2.4325, p-value = 0.007498
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       0.089142919      -0.031250000       0.002449644

10b. Geary’s C across Manchester city

Geary’s C test: insufficient evidence to reject the null hypothesis that similar values of crime densities are clustering

C_MWard_Global_Density <- geary.test(manchester_final@data$crimedensity, Mward.queens_weight)
C_MWard_Global_Density
## 
##  Geary C test under randomisation
## 
## data:  manchester_final@data$crimedensity 
## weights: Mward.queens_weight 
## 
## Geary C statistic standard deviate = 0.24572, p-value = 0.4029
## alternative hypothesis: Expectation greater than statistic
## sample estimates:
## Geary C statistic       Expectation          Variance 
##        0.95945997        1.00000000        0.02721973

Calculate local versions of Moran’s I statistic for each ward to see in which wards the hot-spots are

#use the localmoran function to generate I for each ward in the city
I_MWard_Local <- localmoran(manchester_final@data$crimes, Mward.queens_weight)
I_MWard_Local_Density <- localmoran(manchester_final@data$crimedensity, Mward.queens_weight)
#what does the output (the localMoran object) look like?
head(I_MWard_Local_Density)
##             Ii     E.Ii     Var.Ii       Z.Ii Pr(z > 0)
## 0   0.05492635 -0.03125 0.06619785 0.33493932 0.3688354
## 1   0.05140974 -0.03125 0.05039111 0.36822824 0.3563515
## 2   0.21394309 -0.03125 0.11361807 0.72741881 0.2334847
## 47  0.06948539 -0.03125 0.04587490 0.47032117 0.3190628
## 48  0.18555654 -0.03125 0.06619785 0.84265625 0.1997104
## 52 -0.01997972 -0.03125 0.05671380 0.04732498 0.4811271

Copying I score and p-valule back into the manchester_final spatialpolygonsdataframe

manchester_final@data$BLocI <- I_MWard_Local[,1]
manchester_final@data$BLocIz <- I_MWard_Local[,4]
manchester_final@data$BLocIR <- I_MWard_Local_Density[,1]
manchester_final@data$BLocIRz <- I_MWard_Local_Density[,4]

Set breaks manually

breaks1<-c(-1000,-2.58,-1.96,-1.65,1.65,1.96,2.58,1000)

Setting colour palette so higher values are redder

MoranColours<- rev(brewer.pal(8, "RdGy"))

Plotting the map

tm_shape(manchester_final) +
    tm_polygons("BLocIRz",
        style="fixed",
        breaks=breaks1,
        palette=MoranColours,
        midpoint=NA,
        title="Local Moran's I, Crime Occurences in Manchester by ward")

11. OLS regression

Converting manchester_final to a dataframe

manchester_df <- as.data.frame(manchester_final)
#define regression equation so no need to retype each time
reg=crimes~popdens+socialgrade+deprivation+polstns

social grade variable’s frequency distribution seems to not be normal - negatively skewed (more higher values translating to LOWER social grade). Check the result of a range of transformations along Tukey’s ladder.

library(car)
symbox(~`socialgrade`, manchester_df, na.rm=T, powers=seq(-3,3,by=.5))

deprivation variable’s frequency distribution seems to not be normal - positively skewed (more higher values translating to GREATER deprivation). Check the result of a range of transformations along Tukey’s ladder.

library(car)
symbox(~`deprivation`, manchester_df, na.rm=T, powers=seq(-3,3,by=.5))

Trying deprivation^2 for a more normal distribution

ggplot(manchester_df, aes(x=(`deprivation`)^2)) + geom_histogram()

Plotting it out on a scatterplot

qplot(x = (`deprivation`)^2, y = `crimes`, data=manchester_df)

qplot(x = (deprivation), y = crimes, data=manchester_df) Plotting crimes as dependent variable, OLS for popdens, socialgrade, deprivation

m <- lm(log1p(crimes) ~ popdens + deprivation + polstns, data=manchester_df)
summary(m)
## 
## Call:
## lm(formula = log1p(crimes) ~ popdens + deprivation + polstns, 
##     data = manchester_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.4695 -0.3300 -0.1003  0.1369  2.5334 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.552933   0.623038   7.308 4.76e-08 ***
## popdens     -0.003269   0.004834  -0.676   0.5043    
## deprivation  0.911520   0.494899   1.842   0.0757 .  
## polstns      0.042547   0.263064   0.162   0.8726    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5787 on 29 degrees of freedom
## Multiple R-squared:  0.1437, Adjusted R-squared:  0.05512 
## F-statistic: 1.622 on 3 and 29 DF,  p-value: 0.2057

Check if residuals are normally distributed

#save the residuals into your dataframe
manchester_df$m_resids <- m$residuals

qplot(m$residuals) + geom_histogram() 

Check for multicollinearity between explanatory variables

library(corrplot)
#pull out the columns we want to check for multicolinearity
tempdf <- manchester_df[,c("popdens","deprivation")]


#rename the columns to something shorter
names(tempdf) <- c("Med House Price", "Un Auth Absence")

#compute the correlation matrix for the two variables of interest
cormat <- cor(tempdf[,1:2], use="complete.obs", method="pearson")

#visualise the correlation matrix
corrplot(cormat)

Checking VIF to confirm no multicollinearity.

vif(m)
##     popdens deprivation     polstns 
##    1.113247    1.069619    1.139611

12. Checking for spatial dependence

12a. Checking residuals

Check if any spatial dependence in the residuals, first using queens neighbours

lm.morantest(m,Mward.queens_weight)
## 
##  Global Moran I for regression residuals
## 
## data:  
## model: lm(formula = log1p(crimes) ~ popdens + deprivation +
## polstns, data = manchester_df)
## weights: Mward.queens_weight
## 
## Moran I statistic standard deviate = 3.7725, p-value = 8.08e-05
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I      Expectation         Variance 
##       0.36845178      -0.05835548       0.01279961

Then using nn

lm.morantest(m,Mward.nn_weight)
## 
##  Global Moran I for regression residuals
## 
## data:  
## model: lm(formula = log1p(crimes) ~ popdens + deprivation +
## polstns, data = manchester_df)
## weights: Mward.nn_weight
## 
## Moran I statistic standard deviate = 3.4091, p-value = 0.0003259
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I      Expectation         Variance 
##      0.269773641     -0.062950514      0.009525663

12b. Checking using Lagrange Multiplier Tests

Another way to check if necessary to run a spatial model based on the OLS is to do lagrange multiplier tests.

lm.LMtests(m,Mward.nn_weight,test=c("LMerr", "LMlag", "RLMerr", "RLMlag", "SARMA"))
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = log1p(crimes) ~ popdens + deprivation +
## polstns, data = manchester_df)
## weights: Mward.nn_weight
## 
## LMerr = 5.4191, df = 1, p-value = 0.01992
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = log1p(crimes) ~ popdens + deprivation +
## polstns, data = manchester_df)
## weights: Mward.nn_weight
## 
## LMlag = 8.307, df = 1, p-value = 0.003949
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = log1p(crimes) ~ popdens + deprivation +
## polstns, data = manchester_df)
## weights: Mward.nn_weight
## 
## RLMerr = 2.1493, df = 1, p-value = 0.1426
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = log1p(crimes) ~ popdens + deprivation +
## polstns, data = manchester_df)
## weights: Mward.nn_weight
## 
## RLMlag = 5.0371, df = 1, p-value = 0.02481
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = log1p(crimes) ~ popdens + deprivation +
## polstns, data = manchester_df)
## weights: Mward.nn_weight
## 
## SARMA = 10.456, df = 2, p-value = 0.005364

13. Spatially Lagged X (SLX) model

Start with a spatially lagged x model. Spatially lagged x model – y=Xß+WXT+e.

library(spatialreg)
reg2=lmSLX(m,data=manchester_df, Mward.queens_weight)
summary(reg2)
## 
## Call:
## lm(formula = formula(paste("y ~ ", paste(colnames(x)[-1], collapse = "+"))), 
##     data = as.data.frame(x), weights = weights)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.98704 -0.25621 -0.07405  0.15080  1.78257 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)   
## (Intercept)      2.6518400  0.8902668   2.979   0.0062 **
## popdens         -0.0003125  0.0051387  -0.061   0.9520   
## deprivation     -0.0568893  0.6200070  -0.092   0.9276   
## polstns          0.3230968  0.2459157   1.314   0.2004   
## lag.popdens      0.0061168  0.0078331   0.781   0.4419   
## lag.deprivation  2.1330028  0.9363487   2.278   0.0312 * 
## lag.polstns      0.9902573  0.5543479   1.786   0.0857 . 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5082 on 26 degrees of freedom
## Multiple R-squared:  0.4079, Adjusted R-squared:  0.2713 
## F-statistic: 2.985 on 6 and 26 DF,  p-value: 0.02357

14. Spatial Lag Model (SLM)

Running the SLM with a queen’s case weights matrix. Spatial Lag (Autoregressive) Model - y=pWy+XB+e

reg3=lagsarlm(m,data=manchester_df, Mward.nn_weight)
summary(reg3)
## 
## Call:lagsarlm(formula = m, data = manchester_df, listw = Mward.nn_weight)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.662125 -0.215728 -0.097012  0.121927  2.129947 
## 
## Type: lag 
## Coefficients: (asymptotic standard errors) 
##               Estimate Std. Error z value Pr(>|z|)
## (Intercept)  1.8755373  0.9061059  2.0699  0.03846
## popdens     -0.0027521  0.0038313 -0.7183  0.47256
## deprivation  0.3381174  0.3945617  0.8569  0.39148
## polstns      0.0265561  0.2069473  0.1283  0.89789
## 
## Rho: 0.6108, LR test value: 8.4388, p-value: 0.0036729
## Asymptotic standard error: 0.14954
##     z-value: 4.0844, p-value: 4.4195e-05
## Wald statistic: 16.682, p-value: 4.4195e-05
## 
## Log likelihood: -22.42409 for lag model
## ML residual variance (sigma squared): 0.20723, (sigma: 0.45523)
## Number of observations: 33 
## Number of parameters estimated: 6 
## AIC: 56.848, (AIC for lm: 63.287)
## LM test for residual autocorrelation
## test value: 2.1553, p-value: 0.14208

Check overall impacts, as infinite feedback loops so p-values for individual variables are meaningless

impacts(reg3,listw=Mward.queens_weight)
## Impact measures (lag, exact):
##                  Direct     Indirect        Total
## popdens     -0.00314703 -0.003924004 -0.007071035
## deprivation  0.38664119  0.482099500  0.868740695
## polstns      0.03036718  0.037864571  0.068231752
summary(impacts(reg3,listw=Mward.queens_weight,R=1000),zstats=TRUE) #Add zstats,pvals
## Impact measures (lag, exact):
##                  Direct     Indirect        Total
## popdens     -0.00314703 -0.003924004 -0.007071035
## deprivation  0.38664119  0.482099500  0.868740695
## polstns      0.03036718  0.037864571  0.068231752
## ========================================================
## Simulation results (asymptotic variance matrix):
## Direct:
## 
## Iterations = 1:997
## Thinning interval = 1 
## Number of chains = 1 
## Sample size per chain = 997 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##                  Mean       SD  Naive SE Time-series SE
## popdens     -0.003306 0.004709 0.0001491      0.0001491
## deprivation  0.380780 0.474955 0.0150420      0.0150420
## polstns      0.025214 0.254503 0.0080602      0.0080602
## 
## 2. Quantiles for each variable:
## 
##                 2.5%       25%       50%       75%   97.5%
## popdens     -0.01209 -0.006401 -0.003262 -0.000275 0.00568
## deprivation -0.51158  0.071980  0.372939  0.704441 1.30224
## polstns     -0.46446 -0.138039  0.019546  0.182830 0.53823
## 
## ========================================================
## Indirect:
## 
## Iterations = 1:997
## Thinning interval = 1 
## Number of chains = 1 
## Sample size per chain = 997 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##                  Mean      SD  Naive SE Time-series SE
## popdens     -0.005456 0.02434 0.0007708      0.0008682
## deprivation  0.605157 1.66843 0.0528398      0.0589077
## polstns      0.098095 0.78620 0.0248993      0.0263214
## 
## 2. Quantiles for each variable:
## 
##                 2.5%       25%       50%        75%   97.5%
## popdens     -0.02574 -0.007902 -0.003559 -0.0002303 0.01085
## deprivation -1.00293  0.091026  0.402778  0.8285394 3.13574
## polstns     -0.82812 -0.159029  0.023264  0.2165952 1.48515
## 
## ========================================================
## Total:
## 
## Iterations = 1:997
## Thinning interval = 1 
## Number of chains = 1 
## Sample size per chain = 997 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##                  Mean      SD  Naive SE Time-series SE
## popdens     -0.008762 0.02723 0.0008622       0.000954
## deprivation  0.985938 1.98355 0.0628195       0.068588
## polstns      0.123308 0.98003 0.0310378       0.033341
## 
## 2. Quantiles for each variable:
## 
##                 2.5%      25%       50%       75%  97.5%
## popdens     -0.03684 -0.01506 -0.007513 -0.000591 0.0155
## deprivation -1.47234  0.19378  0.849485  1.529969 4.2874
## polstns     -1.26404 -0.30803  0.048123  0.420586 1.9767
## 
## ========================================================
## Simulated standard errors
##                  Direct  Indirect      Total
## popdens     0.004709365 0.0243376 0.02722546
## deprivation 0.474954507 1.6684331 1.98354556
## polstns     0.254502572 0.7862019 0.98002871
## 
## Simulated z-values:
##                  Direct   Indirect      Total
## popdens     -0.70195683 -0.2241828 -0.3218253
## deprivation  0.80171963  0.3627100  0.4970582
## polstns      0.09907033  0.1247704  0.1258212
## 
## Simulated p-values:
##             Direct  Indirect Total  
## popdens     0.48271 0.82262  0.74759
## deprivation 0.42272 0.71682  0.61915
## polstns     0.92108 0.90071  0.89987

15. Spatial Error Model (SEM)

Next do a SEM

reg4=errorsarlm(m,data=manchester_df, Mward.queens_weight)
summary(reg4)
## 
## Call:
## errorsarlm(formula = m, data = manchester_df, listw = Mward.queens_weight)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.538925 -0.245918 -0.052433  0.128232  2.009450 
## 
## Type: error 
## Coefficients: (asymptotic standard errors) 
##               Estimate Std. Error z value Pr(>|z|)
## (Intercept)  5.3364619  0.6052271  8.8173   <2e-16
## popdens     -0.0045779  0.0042227 -1.0841   0.2783
## deprivation  0.2379737  0.4909464  0.4847   0.6279
## polstns     -0.1408868  0.1842669 -0.7646   0.4445
## 
## Lambda: 0.61019, LR test value: 9.9545, p-value: 0.0016046
## Asymptotic standard error: 0.14506
##     z-value: 4.2064, p-value: 2.5947e-05
## Wald statistic: 17.694, p-value: 2.5947e-05
## 
## Log likelihood: -21.66624 for error model
## ML residual variance (sigma squared): 0.19362, (sigma: 0.44002)
## Number of observations: 33 
## Number of parameters estimated: 6 
## AIC: 55.332, (AIC for lm: 63.287)

16. GWR and its plots

Trying GWR

library(spgwr)
#calculate kernel bandwidth
GWRbandwidth <- gwr.sel(log1p(crimes) ~ popdens + deprivation + polstns, data=manchester_df, coords=coordsWards,adapt=T)
## Adaptive q: 0.381966 CV score: 11.40972 
## Adaptive q: 0.618034 CV score: 11.6588 
## Adaptive q: 0.236068 CV score: 11.47475 
## Adaptive q: 0.3657329 CV score: 11.41114 
## Adaptive q: 0.3896803 CV score: 11.41104 
## Adaptive q: 0.3779032 CV score: 11.4095 
## Adaptive q: 0.3771599 CV score: 11.4095 
## Adaptive q: 0.3773912 CV score: 11.4095 
## Adaptive q: 0.3774319 CV score: 11.4095 
## Adaptive q: 0.3773505 CV score: 11.4095 
## Adaptive q: 0.3773912 CV score: 11.4095
#run the gwr model
gwr.model = gwr(m, coords=coordsWards, adapt=GWRbandwidth, hatmatrix=TRUE, se.fit=TRUE)

#print the results of the model
gwr.model
## Call:
## gwr(formula = m, coords = coordsWards, adapt = GWRbandwidth, 
##     hatmatrix = TRUE, se.fit = TRUE)
## Kernel function: gwr.Gauss 
## Adaptive quantile: 0.3773912 (about 12 of 33 data points)
## Summary of GWR coefficient estimates at data points:
##                     Min.     1st Qu.      Median     3rd Qu.        Max.
## X.Intercept.  4.34333309  4.45449801  4.76115937  5.19473665  5.60632329
## popdens      -0.00734385 -0.00589415 -0.00432275 -0.00226600 -0.00073926
## deprivation   0.31889085  0.52964949  0.79842305  0.88945093  0.99184834
## polstns      -0.01367024  0.01095601  0.02308837  0.03409300  0.04536910
##               Global
## X.Intercept.  4.5529
## popdens      -0.0033
## deprivation   0.9115
## polstns       0.0425
## Number of data points: 33 
## Effective number of parameters (residual: 2traceS - traceS'S): 8.1081 
## Effective degrees of freedom (residual: 2traceS - traceS'S): 24.8919 
## Sigma (residual: 2traceS - traceS'S): 0.552994 
## Effective number of parameters (model: traceS): 6.571103 
## Effective degrees of freedom (model: traceS): 26.4289 
## Sigma (model: traceS): 0.5366732 
## Sigma (ML): 0.4802777 
## AICc (GWR p. 61, eq 2.33; p. 96, eq. 4.21): 65.70114 
## AIC (GWR p. 96, eq. 4.22): 51.81726 
## Residual sum of squares: 7.612001 
## Quasi-global R2: 0.3288733

Collect results

results<-as.data.frame(gwr.model$SDF)
names(results)
##  [1] "sum.w"               "X.Intercept."        "popdens"            
##  [4] "deprivation"         "polstns"             "X.Intercept._se"    
##  [7] "popdens_se"          "deprivation_se"      "polstns_se"         
## [10] "gwr.e"               "pred"                "pred.se"            
## [13] "localR2"             "X.Intercept._se_EDF" "popdens_se_EDF"     
## [16] "deprivation_se_EDF"  "polstns_se_EDF"      "pred.se.1"          
## [19] "coord.x"             "coord.y"

Attach results to original dataframe

#attach coefficients to original dataframe
manchester_final@data$coefpopdens<-results$popdens
manchester_final@data$coefdeprivation<-results$deprivation
manchester_final@data$coefpolstns<-results$polstns

Plotting GWR result for popdens

tm_shape(manchester_final) +
  tm_polygons(col = "coefpopdens", palette = "RdBu", alpha = 0.5)

Plotting GWR results for deprivation

tm_shape(manchester_final) +
  tm_polygons(col = "coefdeprivation", palette = "RdBu", alpha = 0.5) 

Plotting GWR results for police stations

tm_shape(manchester_final) +
  tm_polygons(col = "coefpolstns", palette = "RdBu", alpha = 0.5) 

Calculating standard errors for statistical significance for population density

#run the significance test
#sigTest = abs(gwr.model$SDF$"log(`Median House Price (£) - 2014`)") -2 * gwr.model$SDF$"log(`Median House Price (£) - 2014`)_se"

sigTest = abs(gwr.model$SDF$"popdens") -2 * gwr.model$SDF$"popdens_se"

#store significance results
manchester_final$GWRpopdens<-sigTest

Plotting significance for population density on the map

tm_shape(manchester_final) +
  tm_polygons(col = "GWRpopdens", palette = "RdYlBu")

Plotting significance for deprivation on the map

#run the significance test
sigTest = abs(gwr.model$SDF$"deprivation") -2 * gwr.model$SDF$"deprivation_se"

#store significance results
manchester_final$GWRdeprivation<-sigTest
#plotting
tm_shape(manchester_final) +
  tm_polygons(col = "GWRdeprivation", palette = "RdYlBu")
sigTest
##  [1] -0.214446417 -0.048627036 -0.042102614 -0.506553445 -0.096120189
##  [6] -0.237855939 -0.518978569 -0.182680631 -0.779023348 -0.409469910
## [11] -0.128028815 -0.002661584 -0.501554521 -0.605923711 -0.152284373
## [16] -0.187226045 -0.243262200 -0.447880588 -0.513664133 -0.058096884
## [21] -0.135267178 -0.266873828 -0.210703630 -0.109190368 -0.715541184
## [26] -0.370186631 -0.300053543 -0.053356652 -0.228826888 -0.188139143
## [31] -0.674957244 -0.101914571 -0.081830056

Plotting significance for police stations on the map

#run the significance test
sigTest = abs(gwr.model$SDF$"polstns") -2 * gwr.model$SDF$"polstns_se"

#store significance results
manchester_final$GWRpolstns<-sigTest
#plotting
tm_shape(manchester_final) +
  tm_polygons(col = "GWRpolstns", palette = "RdYlBu")